{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-08-09T17:44:16.881236",
     "start_time": "2017-08-09T17:44:16.878235"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Intro\n",
    "This notebook explores sorting algorithms mostly for interviews purposes."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple Sorts\n",
    "* Insertion sort\n",
    "* Selection sort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-02T10:32:25.820610",
     "start_time": "2017-05-02T10:32:25.813609"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a = np.random.randint(0, 1000, 100)\n",
    "a_sorted = sorted(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-02T10:42:27.629031",
     "start_time": "2017-05-02T10:42:27.621031"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# first naive implementation (which turned out to be insertion sort)\n",
    "def sort(a):\n",
    "    a = a.copy()\n",
    "    for i in range(len(a)-1):\n",
    "        if a[i+1]<a[i]:\n",
    "            tmp = a[i]\n",
    "            a[i] = a[i+1]\n",
    "            a[i+1] = tmp\n",
    "            for j in range(i-1, -1, -1):\n",
    "                if a[j]>a[j+1]:\n",
    "                    tmp = a[j]\n",
    "                    a[j] = a[j+1]\n",
    "                    a[j+1] = tmp\n",
    "                else:\n",
    "                    break\n",
    "    return a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-12T11:54:07.602070",
     "start_time": "2017-05-12T11:54:07.597070"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# structurally improved\n",
    "def insertion_sort(a):\n",
    "    a = a.copy()\n",
    "    for i in range(len(a)):\n",
    "        j = i\n",
    "        while j>0 and a[j]<a[j-1]:\n",
    "            tmp = a[j]\n",
    "            a[j] = a[j-1]\n",
    "            a[j-1] = tmp\n",
    "            j -= 1\n",
    "    return a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-12T11:54:17.032610",
     "start_time": "2017-05-12T11:54:17.023609"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# selection sort\n",
    "def selection_sort(a):\n",
    "    a = a.copy()\n",
    "    for i in range(len(a)-1):\n",
    "        min_val = a[i]\n",
    "        min_idx = i\n",
    "        for j in range(i+1, len(a)):\n",
    "            if a[j] < min_val:\n",
    "                min_val = a[j]\n",
    "                min_idx = j\n",
    "        if min_idx != i:\n",
    "            a[min_idx] = a[i]\n",
    "            a[i] = min_val\n",
    "    return a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-02T13:51:18.987028",
     "start_time": "2017-05-02T13:51:18.981028"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  2,   6,   9,  24,  29,  39,  41,  48,  52,  54,  65,  66,  79,\n",
       "        79,  85, 112, 118, 157, 172, 201, 214, 218, 234, 246, 246, 263,\n",
       "       267, 270, 273, 278, 279, 291, 292, 294, 298, 327, 358, 362, 375,\n",
       "       378, 378, 388, 390, 397, 401, 402, 426, 426, 428, 432, 443, 447,\n",
       "       485, 495, 499, 500, 513, 529, 585, 585, 605, 608, 611, 616, 651,\n",
       "       655, 664, 668, 676, 682, 685, 685, 689, 690, 702, 743, 779, 797,\n",
       "       798, 809, 816, 817, 821, 832, 853, 871, 872, 875, 904, 907, 909,\n",
       "       915, 921, 930, 943, 969, 980, 984, 990, 992])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sort(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Bubble Sort\n",
    "$n | n^2 | n^2 | 1 |Yes$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-08-09T09:51:08.324965",
     "start_time": "2017-08-09T09:51:08.306964"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def bubble_sort(a):\n",
    "    a = a.copy()\n",
    "    is_sorted = False\n",
    "    while not is_sorted:\n",
    "        is_sorted = True\n",
    "        for i in range(len(a)-1):\n",
    "            if a[i] > a[i+1]:\n",
    "                a[i], a[i+1] = a[i+1], a[i]\n",
    "                is_sorted = False\n",
    "    return a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-08-09T09:59:05.256244",
     "start_time": "2017-08-09T09:54:58.015103"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 loops, best of 1000: 2.41 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit -r 1000 bubble_sort(np.random.randint(0, 1000, 100))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Efficient Sorts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merge Sort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-08-09T09:51:46.981176",
     "start_time": "2017-08-09T09:51:46.957175"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def merge_sort(a):\n",
    "    if len(a) <= 1:\n",
    "        return a\n",
    "    \n",
    "    left = merge_sort(a[:len(a)//2])\n",
    "    right = merge_sort(a[len(a)//2:])\n",
    "    \n",
    "    return merge(left, right)\n",
    "\n",
    "def merge(left, right):\n",
    "    res = []\n",
    "    while len(left)>0 and len(right)>0:\n",
    "        if left[0] < right[0]:\n",
    "            res.append(left.pop(0))\n",
    "        else:\n",
    "            res.append(right.pop(0))\n",
    "    if len(left) == 0:\n",
    "        res += right\n",
    "    else:\n",
    "        res += left\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-08-09T09:54:43.102250",
     "start_time": "2017-08-09T09:54:41.558161"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000 loops, best of 3: 368 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit merge_sort(list(np.random.randint(0, 1000, 100)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-12T12:12:28.351029",
     "start_time": "2017-05-12T12:12:28.338029"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0,\n",
       " 1,\n",
       " 4,\n",
       " 11,\n",
       " 32,\n",
       " 42,\n",
       " 59,\n",
       " 59,\n",
       " 71,\n",
       " 87,\n",
       " 94,\n",
       " 120,\n",
       " 127,\n",
       " 163,\n",
       " 182,\n",
       " 192,\n",
       " 195,\n",
       " 196,\n",
       " 221,\n",
       " 229,\n",
       " 230,\n",
       " 234,\n",
       " 238,\n",
       " 246,\n",
       " 256,\n",
       " 260,\n",
       " 272,\n",
       " 277,\n",
       " 297,\n",
       " 302,\n",
       " 320,\n",
       " 322,\n",
       " 330,\n",
       " 334,\n",
       " 338,\n",
       " 344,\n",
       " 349,\n",
       " 363,\n",
       " 410,\n",
       " 424,\n",
       " 439,\n",
       " 442,\n",
       " 471,\n",
       " 474,\n",
       " 480,\n",
       " 492,\n",
       " 501,\n",
       " 512,\n",
       " 513,\n",
       " 515,\n",
       " 518,\n",
       " 530,\n",
       " 543,\n",
       " 547,\n",
       " 555,\n",
       " 581,\n",
       " 611,\n",
       " 614,\n",
       " 622,\n",
       " 627,\n",
       " 636,\n",
       " 639,\n",
       " 643,\n",
       " 661,\n",
       " 664,\n",
       " 672,\n",
       " 683,\n",
       " 694,\n",
       " 706,\n",
       " 732,\n",
       " 737,\n",
       " 743,\n",
       " 755,\n",
       " 769,\n",
       " 770,\n",
       " 773,\n",
       " 783,\n",
       " 793,\n",
       " 809,\n",
       " 820,\n",
       " 820,\n",
       " 826,\n",
       " 845,\n",
       " 853,\n",
       " 874,\n",
       " 888,\n",
       " 889,\n",
       " 895,\n",
       " 896,\n",
       " 902,\n",
       " 904,\n",
       " 911,\n",
       " 925,\n",
       " 940,\n",
       " 961,\n",
       " 978,\n",
       " 980,\n",
       " 991,\n",
       " 992,\n",
       " 996]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merge_sort(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quick Sort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-12T12:37:14.556035",
     "start_time": "2017-05-12T12:37:14.544035"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def quick_sort(a, lo=0, hi=len(a)-1):\n",
    "    if lo < hi:\n",
    "        p = partition(a, lo, hi)\n",
    "        quick_sort(a, lo, p-1)\n",
    "        quick_sort(a, p+1, hi)\n",
    "    return a\n",
    "    \n",
    "def partition(a, lo, hi):\n",
    "    pivot = a[hi]\n",
    "    i = lo-1\n",
    "    for j in range(lo, hi):\n",
    "        if a[j]<pivot:\n",
    "            i += 1\n",
    "            tmp = a[i]\n",
    "            a[i] = a[j]\n",
    "            a[j] = tmp\n",
    "    a[hi] = a[i+1]\n",
    "    a[i+1] = pivot\n",
    "    return i+1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-05-12T12:37:14.945058",
     "start_time": "2017-05-12T12:37:14.939057"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0,\n",
       " 1,\n",
       " 4,\n",
       " 11,\n",
       " 32,\n",
       " 42,\n",
       " 59,\n",
       " 59,\n",
       " 71,\n",
       " 87,\n",
       " 94,\n",
       " 120,\n",
       " 127,\n",
       " 163,\n",
       " 182,\n",
       " 192,\n",
       " 195,\n",
       " 196,\n",
       " 221,\n",
       " 229,\n",
       " 230,\n",
       " 234,\n",
       " 238,\n",
       " 246,\n",
       " 256,\n",
       " 260,\n",
       " 272,\n",
       " 277,\n",
       " 297,\n",
       " 302,\n",
       " 320,\n",
       " 322,\n",
       " 330,\n",
       " 334,\n",
       " 338,\n",
       " 344,\n",
       " 349,\n",
       " 363,\n",
       " 410,\n",
       " 424,\n",
       " 439,\n",
       " 442,\n",
       " 471,\n",
       " 474,\n",
       " 480,\n",
       " 492,\n",
       " 501,\n",
       " 512,\n",
       " 513,\n",
       " 515,\n",
       " 518,\n",
       " 530,\n",
       " 543,\n",
       " 547,\n",
       " 555,\n",
       " 581,\n",
       " 611,\n",
       " 614,\n",
       " 622,\n",
       " 627,\n",
       " 636,\n",
       " 639,\n",
       " 643,\n",
       " 661,\n",
       " 664,\n",
       " 672,\n",
       " 683,\n",
       " 694,\n",
       " 706,\n",
       " 732,\n",
       " 737,\n",
       " 743,\n",
       " 755,\n",
       " 769,\n",
       " 770,\n",
       " 773,\n",
       " 783,\n",
       " 793,\n",
       " 809,\n",
       " 820,\n",
       " 820,\n",
       " 826,\n",
       " 845,\n",
       " 853,\n",
       " 874,\n",
       " 888,\n",
       " 889,\n",
       " 895,\n",
       " 896,\n",
       " 902,\n",
       " 904,\n",
       " 911,\n",
       " 925,\n",
       " 940,\n",
       " 961,\n",
       " 978,\n",
       " 980,\n",
       " 991,\n",
       " 992,\n",
       " 996]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quick_sort(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:kaggle]",
   "language": "python",
   "name": "conda-env-kaggle-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "67px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
